/**
 * 
 */
package edu.umd.clip.lm.programs;

import java.io.File;
import java.io.IOException;
import java.lang.management.*;

import com.sleepycat.je.*;

import edu.berkeley.nlp.util.*;

import edu.umd.clip.jobs.*;
import edu.umd.clip.lm.model.*;
import edu.umd.clip.lm.model.Experiment.*;
import edu.umd.clip.lm.model.training.*;
import edu.umd.clip.lm.storage.*;
import edu.umd.clip.lm.util.tree.BinaryTreeIterator;
/**
 * @author Denis Filimonov <den@cs.umd.edu>
 *
 */
public class LMClusterPopulator {
	public static class Options {
        @Option(name = "-config", required = true, usage = "XML config file")
		public String config;
        @Option(name = "-jobs", required = false, usage = "number of concurrent jobs (default: 1)")
        public int jobs = 1;
        @Option(name = "-lm", required = true, usage = "LM ID to populate")
		public String lm;        
        @Option(name = "-datadir", required = false, usage = "the directory for the temporary files")
		public String datadir = null;        
        @Option(name = "-compact", required = false, usage = "use Compact storage (default: false)")
		public boolean useCompact = false;        
        @Option(name = "-bdb", required = false, usage = "use Berkeley DB storage (default: false)")
		public boolean useBDB = false;        
	}
	
	public static void main(String[] args) throws Exception {
        OptionParser optParser = new OptionParser(Options.class);
        final Options opts = (Options) optParser.parse(args, true);

		Experiment.initialize(opts.config);
		final Experiment experiment = Experiment.getInstance();
		experiment.buildPrefixes();
		experiment.buildWordPrefixes();
		
		JobManager.initialize(opts.jobs, opts.jobs + 2);
		Thread thread = new Thread(JobManager.getInstance(), "Job Manager");
		thread.setDaemon(true);
		thread.start();

		
		LanguageModel lm  = experiment.getLM(opts.lm);
		Files files = experiment.getFiles();

		File tmpDir = opts.datadir == null ? new File("populate-" + lm.getId()) : new File(opts.datadir);
		if (!tmpDir.isDirectory()) {
			tmpDir.mkdirs();
		}
		//AbstractProbTreeStorage storage;
		if (opts.useBDB) {
			final Environment env = BDBProbTreeStorage.createEnvironment(files.getDb(), true);
			env.getConfig().setCachePercent(50);
			final BDBProbTreeStorage storage = new BDBProbTreeStorage(env);
			storage.open(lm.getId(), lm.getIdNum(), true);

			doPopulate(lm, storage, tmpDir);

			storage.close(lm.getIdNum());
			
			env.close();
		} else {
			final MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
			
			MemoryUsage memuse = memoryBean.getHeapMemoryUsage();
			long cacheSize = memuse.getMax() / 4;

			CompactProbTreeStorage storage = new CompactProbTreeStorage(cacheSize);
			
			int maxClusterId = 0;
			for(BinaryTreeIterator<HistoryTreePayload> it = lm.getHistoryTree().getPostOrderIterator(); it.hasNext(); ) {
				HistoryTreePayload p = it.next();
				if (p.clusterid > maxClusterId) {
					maxClusterId = p.clusterid;
				}
			}
			
			storage.openForWriting(lm.getIdNum(), maxClusterId);
			
			doPopulate(lm, storage, tmpDir);
			
			System.err.printf("CompactStorage: %s\n", storage.getStats());
			
			storage.close(lm.getIdNum());
		}
		
	}
	
	private static void doPopulate(LanguageModel lm, AbstractProbTreeStorage storage, File tmpDir) throws DatabaseException, IOException {
		YetAnotherClusterPopulator populator = new YetAnotherClusterPopulator(lm, storage);
		
		populator.initialize(tmpDir);
		populator.populate();

	}
}
